#install.packages("ggplot2")
library(ggplot2)
txhousing <- read.csv("housing.csv")
View(txhousing)
cl <- class(txhousing)
nam <- names(txhousing)
dimen <- dim(txhousing)
Q1 <- c(cl, nam, dimen)
Q1
 [1] "tbl_df"     "tbl"        "data.frame" "city"       "year"      
 [6] "month"      "sales"      "volume"     "median"     "listings"  
[11] "inventory"  "date"       "8602"       "9"         
Q2 <- ls.str(txhousing)
Q3 <- summary(txhousing$median)
Q4 <- table(txhousing$year)
Q4

2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 
 552  552  552  552  552  552  552  552  552  552  552  552  552  552  552  322 
unique(txhousing$city)
table(txhousing$city)
Q5 <-length(unique(txhousing$city))
boxplot(txhousing$sales,  horizontal = TRUE, xlab="stuff", main="Title")

NA
NA
boxplot(txhousing$year, txhousing$sales)

##horizontal comp boxplot
boxplot(txhousing$sales~txhousing$year, horizontal = TRUE, xlab = ("Year"),frame=FALSE, ylab = ("Sales"))

NA
NA
#q8
boxplot(txhousing$sales ~ txhousing$year,
        horizontal = TRUE,
        las = 1,
        xlab = "Sales",
        ylab = "Year",
        main = "Sales by Year in Texas Housing",
        frame = FALSE)   # or bty = "n"



boxplot(txhousing$median~txhousing$year,
        horizontal = TRUE,
        xlab = "Median Sale Price",
        ylab = "Year",
        main="Housing Prices",
        las=1,
        color="lightgreen",
        frame=FALSE)

boxplot(txhousing$sales ~ txhousing$year,
        horizontal = FALSE,
        las = 1,
        xlab = "Sales",
        ylab = "Year",
        main = "Sales by Year in Texas Housing",
        frame = FALSE)   # or bty = "n")

boxplot(txhousing$median ~ txhousing$year,
        horizontal = TRUE,
        las = 1,                            # rotate y-axis labels horizontally
        xlab = "Median Sales Price",
        ylab = "Year",
        main = "Median Sales Price by Year",
        col = "lightgreen",                # color the boxes light green
        frame = FALSE)                     # remove the plot frame

#Q9a
plot(txhousing$sales, txhousing$median,
     pch=16,
     las=1,
     cex=.75)

abline(lm(median~sales, data=txhousing),
       col="red",
       lwd=4)

plot(median ~ sales,
     data = txhousing,
     pch = 16,          # solid black dots
     cex = 0.75,        # slightly smaller points
     xlab = "Sales",
     ylab = "Median Sales Price",
     main = "Median Price vs. Sales")

Q10 <- sum(is.na(txhousing$median))
#sum(proportions(is.na(txhousing$median)))
Q11 <- mean(is.na(txhousing$median))
#q12

NA_meidan_index <- c(which(is.na(txhousing$median)))
length(NA_meidan_index)
[1] 616
#q13
txhousing[NA_meidan_index, c("city", "year", "sales", "median")]
NA
#q14
txhousing_clean <- txhousing[-NA_meidan_index, c("city", "year", "sales", "median", "month")]
##correlation

cor(txhousing$median, txhousing$listings, use="complete.obs")
[1] 0.2451009
cor(txhousing$median, txhousing$sales, use="complete.obs")
[1] 0.3449698
cor(txhousing$median, txhousing$inventory, use="complete.obs")
[1] -0.1421893
cor(txhousing$sales, txhousing$listings, use="complete.obs")
[1] 0.9214641
##correlation matrix
print(cor(txhousing[, c('sales', 'year', 'inventory', 'listings', 'month', 'median')], use = "complete.obs"))
                sales        year   inventory     listings        month      median
sales      1.00000000 -0.01619670 -0.19290223  0.921391249  0.017088462  0.33507418
year      -0.01619670  1.00000000  0.09757065 -0.054056894 -0.044882696  0.48289997
inventory -0.19290223  0.09757065  1.00000000 -0.100199728  0.014515758 -0.14218928
listings   0.92139125 -0.05405689 -0.10019973  1.000000000  0.005974225  0.24560270
month      0.01708846 -0.04488270  0.01451576  0.005974225  1.000000000  0.03699859
median     0.33507418  0.48289997 -0.14218928  0.245602698  0.036998586  1.00000000
#lm(sales ~ median, data = txhousing)
mod1 <- lm(median ~ sales, data = txhousing_clean)
#Create a linear model predicting median sales price based on sales for the clean dataset. Store the model as mod1.
mod1

Call:
lm(formula = median ~ sales, data = txhousing_clean)

Coefficients:
(Intercept)        sales  
  121741.82        11.57  
##q16

txhousing_clean[1, c("sales","median")]

Q16 <- 121741.82 + 11.57 * 72 ##72 is the sales number?  
predict(mod1, txhousing_clean[1, ])
       1 
122575.2 
Q17 <- summary(mod1)
summary(lm(median~sales, data=txhousing_clean))
#q18

round(cor(txhousing_clean$sales, txhousing_clean$median, use='complete.obs'), 2)
[1] 0.34
#q19
options = 999 # turn off sci notation
mod2 <- lm(median~sales+month, data=txhousing_clean)
summary(mod2)

Call:
lm(formula = median ~ sales + month, data = txhousing_clean)

Residuals:
   Min     1Q Median     3Q    Max 
-70676 -26504  -3740  19360 164791 

Coefficients:
               Estimate  Std. Error t value             Pr(>|t|)    
(Intercept) 119423.8803    848.1637 140.803 < 0.0000000000000002 ***
sales           11.5564      0.3523  32.803 < 0.0000000000000002 ***
month          363.6054    113.9494   3.191              0.00142 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 35050 on 7982 degrees of freedom
  (1 observation deleted due to missingness)
Multiple R-squared:  0.1201,    Adjusted R-squared:  0.1199 
F-statistic: 544.9 on 2 and 7982 DF,  p-value: < 0.00000000000000022
##rsquared is quite low .1199 thest two pred explain on a a small amount of the variability in mdeian sales price.  square footage, location, number of bedrooms age of home probably do a better prediction.  
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CiNpbnN0YWxsLnBhY2thZ2VzKCJnZ3Bsb3QyIikKbGlicmFyeShnZ3Bsb3QyKQpgYGAKCmBgYHtyfQp0eGhvdXNpbmcgPC0gcmVhZC5jc3YoImhvdXNpbmcuY3N2IikKVmlldyh0eGhvdXNpbmcpCmBgYAoKYGBge3J9CmNsIDwtIGNsYXNzKHR4aG91c2luZykKbmFtIDwtIG5hbWVzKHR4aG91c2luZykKZGltZW4gPC0gZGltKHR4aG91c2luZykKUTEgPC0gYyhjbCwgbmFtLCBkaW1lbikKCmBgYAoKYGBge3J9ClEyIDwtIGxzLnN0cih0eGhvdXNpbmcpCmBgYAoKYGBge3J9ClEzIDwtIHN1bW1hcnkodHhob3VzaW5nJG1lZGlhbikKYGBgCgpgYGB7cn0KUTQgPC0gdGFibGUodHhob3VzaW5nJHllYXIpClE0CmBgYAoKYGBge3J9CnVuaXF1ZSh0eGhvdXNpbmckY2l0eSkKdGFibGUodHhob3VzaW5nJGNpdHkpClE1IDwtbGVuZ3RoKHVuaXF1ZSh0eGhvdXNpbmckY2l0eSkpCmBgYAoKYGBge3J9CmJveHBsb3QodHhob3VzaW5nJHNhbGVzLCAgaG9yaXpvbnRhbCA9IFRSVUUsIHhsYWI9InN0dWZmIiwgbWFpbj0iVGl0bGUiKQoKCmBgYAoKYGBge3J9CmJveHBsb3QodHhob3VzaW5nJHllYXIsIHR4aG91c2luZyRzYWxlcykKYGBgCgpgYGB7cn0KIyNob3Jpem9udGFsIGNvbXAgYm94cGxvdApib3hwbG90KHR4aG91c2luZyRzYWxlc350eGhvdXNpbmckeWVhciwgaG9yaXpvbnRhbCA9IFRSVUUsIHhsYWIgPSAoIlllYXIiKSxmcmFtZT1GQUxTRSwgeWxhYiA9ICgiU2FsZXMiKSkKCgpgYGAKCmBgYHtyfQojcTgKYm94cGxvdCh0eGhvdXNpbmckc2FsZXMgfiB0eGhvdXNpbmckeWVhciwKICAgICAgICBob3Jpem9udGFsID0gVFJVRSwKICAgICAgICBsYXMgPSAxLAogICAgICAgIHhsYWIgPSAiU2FsZXMiLAogICAgICAgIHlsYWIgPSAiWWVhciIsCiAgICAgICAgbWFpbiA9ICJTYWxlcyBieSBZZWFyIGluIFRleGFzIEhvdXNpbmciLAogICAgICAgIGZyYW1lID0gRkFMU0UpICAgIyBvciBidHkgPSAibiIKCgpib3hwbG90KHR4aG91c2luZyRtZWRpYW5+dHhob3VzaW5nJHllYXIsCiAgICAgICAgaG9yaXpvbnRhbCA9IFRSVUUsCiAgICAgICAgeGxhYiA9ICJNZWRpYW4gU2FsZSBQcmljZSIsCiAgICAgICAgeWxhYiA9ICJZZWFyIiwKICAgICAgICBtYWluPSJIb3VzaW5nIFByaWNlcyIsCiAgICAgICAgbGFzPTEsCiAgICAgICAgY29sb3I9ImxpZ2h0Z3JlZW4iLAogICAgICAgIGZyYW1lPUZBTFNFKQpgYGAKCmBgYHtyfQpib3hwbG90KHR4aG91c2luZyRzYWxlcyB+IHR4aG91c2luZyR5ZWFyLAogICAgICAgIGhvcml6b250YWwgPSBGQUxTRSwKICAgICAgICBsYXMgPSAxLAogICAgICAgIHhsYWIgPSAiU2FsZXMiLAogICAgICAgIHlsYWIgPSAiWWVhciIsCiAgICAgICAgbWFpbiA9ICJTYWxlcyBieSBZZWFyIGluIFRleGFzIEhvdXNpbmciLAogICAgICAgIGZyYW1lID0gRkFMU0UpICAgIyBvciBidHkgPSAibiIpCmBgYAoKYGBge3J9CiNxOEEKYm94cGxvdCh0eGhvdXNpbmckbWVkaWFuIH4gdHhob3VzaW5nJHllYXIsCiAgICAgICAgaG9yaXpvbnRhbCA9IFRSVUUsCiAgICAgICAgbGFzID0gMSwgICAgICAgICAgICAgICAgICAgICAgICAgICAgIyByb3RhdGUgeS1heGlzIGxhYmVscyBob3Jpem9udGFsbHkKICAgICAgICB4bGFiID0gIk1lZGlhbiBTYWxlcyBQcmljZSIsCiAgICAgICAgeWxhYiA9ICJZZWFyIiwKICAgICAgICBtYWluID0gIk1lZGlhbiBTYWxlcyBQcmljZSBieSBZZWFyIiwKICAgICAgICBjb2wgPSAibGlnaHRncmVlbiIsICAgICAgICAgICAgICAgICMgY29sb3IgdGhlIGJveGVzIGxpZ2h0IGdyZWVuCiAgICAgICAgZnJhbWUgPSBGQUxTRSkgICAgICAgICAgICAgICAgICAgICAjIHJlbW92ZSB0aGUgcGxvdCBmcmFtZQoKYGBgCgpgYGB7cn0KI1E5YQpwbG90KHR4aG91c2luZyRzYWxlcywgdHhob3VzaW5nJG1lZGlhbiwKICAgICBwY2g9MTYsCiAgICAgbGFzPTEsCiAgICAgY2V4PS43NSkKCmFibGluZShsbShtZWRpYW5+c2FsZXMsIGRhdGE9dHhob3VzaW5nKSwKICAgICAgIGNvbD0icmVkIiwKICAgICAgIGx3ZD00KQpgYGAKCmBgYHtyfQpwbG90KG1lZGlhbiB+IHNhbGVzLAogICAgIGRhdGEgPSB0eGhvdXNpbmcsCiAgICAgcGNoID0gMTYsICAgICAgICAgICMgc29saWQgYmxhY2sgZG90cwogICAgIGNleCA9IDAuNzUsICAgICAgICAjIHNsaWdodGx5IHNtYWxsZXIgcG9pbnRzCiAgICAgeGxhYiA9ICJTYWxlcyIsCiAgICAgeWxhYiA9ICJNZWRpYW4gU2FsZXMgUHJpY2UiLAogICAgIG1haW4gPSAiTWVkaWFuIFByaWNlIHZzLiBTYWxlcyIpCgpgYGAKCmBgYHtyfQpRMTAgPC0gc3VtKGlzLm5hKHR4aG91c2luZyRtZWRpYW4pKQpgYGAKCmBgYHtyfQojc3VtKHByb3BvcnRpb25zKGlzLm5hKHR4aG91c2luZyRtZWRpYW4pKSkKUTExIDwtIG1lYW4oaXMubmEodHhob3VzaW5nJG1lZGlhbikpCgpgYGAKCmBgYHtyfQojcTEyCgpOQV9tZWlkYW5faW5kZXggPC0gYyh3aGljaChpcy5uYSh0eGhvdXNpbmckbWVkaWFuKSkpCmxlbmd0aChOQV9tZWlkYW5faW5kZXgpCmBgYAoKYGBge3J9CiNxMTMKdHhob3VzaW5nW05BX21laWRhbl9pbmRleCwgYygiY2l0eSIsICJ5ZWFyIiwgInNhbGVzIiwgIm1lZGlhbiIpXQoKYGBgCgpgYGB7cn0KI3ExNAp0eGhvdXNpbmdfY2xlYW4gPC0gdHhob3VzaW5nWy1OQV9tZWlkYW5faW5kZXgsIGMoImNpdHkiLCAieWVhciIsICJzYWxlcyIsICJtZWRpYW4iLCAibW9udGgiKV0KCmBgYAoKYGBge3J9CiMjY29ycmVsYXRpb24KCmNvcih0eGhvdXNpbmckbWVkaWFuLCB0eGhvdXNpbmckbGlzdGluZ3MsIHVzZT0iY29tcGxldGUub2JzIikKY29yKHR4aG91c2luZyRtZWRpYW4sIHR4aG91c2luZyRzYWxlcywgdXNlPSJjb21wbGV0ZS5vYnMiKQpjb3IodHhob3VzaW5nJG1lZGlhbiwgdHhob3VzaW5nJGludmVudG9yeSwgdXNlPSJjb21wbGV0ZS5vYnMiKQpjb3IodHhob3VzaW5nJHNhbGVzLCB0eGhvdXNpbmckbGlzdGluZ3MsIHVzZT0iY29tcGxldGUub2JzIikKYGBgCgpgYGB7cn0KIyNjb3JyZWxhdGlvbiBtYXRyaXgKcHJpbnQoY29yKHR4aG91c2luZ1ssIGMoJ3NhbGVzJywgJ3llYXInLCAnaW52ZW50b3J5JywgJ2xpc3RpbmdzJywgJ21vbnRoJywgJ21lZGlhbicpXSwgdXNlID0gImNvbXBsZXRlLm9icyIpKQpgYGAKCmBgYHtyfQojbG0oc2FsZXMgfiBtZWRpYW4sIGRhdGEgPSB0eGhvdXNpbmcpCm1vZDEgPC0gbG0obWVkaWFuIH4gc2FsZXMsIGRhdGEgPSB0eGhvdXNpbmdfY2xlYW4pCiNDcmVhdGUgYSBsaW5lYXIgbW9kZWwgcHJlZGljdGluZyBtZWRpYW4gc2FsZXMgcHJpY2UgYmFzZWQgb24gc2FsZXMgZm9yIHRoZSBjbGVhbiBkYXRhc2V0LiBTdG9yZSB0aGUgbW9kZWwgYXMgbW9kMS4KbW9kMQpgYGAKCmBgYHtyfQojI3ExNgoKdHhob3VzaW5nX2NsZWFuWzEsIGMoInNhbGVzIiwibWVkaWFuIildCgpRMTYgPC0gMTIxNzQxLjgyICsgMTEuNTcgKiA3MiAjIzcyIGlzIHRoZSBzYWxlcyBudW1iZXI/ICAKcHJlZGljdChtb2QxLCB0eGhvdXNpbmdfY2xlYW5bMSwgXSkKYGBgCgpgYGB7cn0KUTE3IDwtIHN1bW1hcnkobW9kMSkKc3VtbWFyeShsbShtZWRpYW5+c2FsZXMsIGRhdGE9dHhob3VzaW5nX2NsZWFuKSkKYGBgCgpgYGB7cn0KI3ExOAoKcm91bmQoY29yKHR4aG91c2luZ19jbGVhbiRzYWxlcywgdHhob3VzaW5nX2NsZWFuJG1lZGlhbiwgdXNlPSdjb21wbGV0ZS5vYnMnKSwgMikKYGBgCgpgYGB7cn0KI3ExOQpvcHRpb25zID0gOTk5ICMgdHVybiBvZmYgc2NpIG5vdGF0aW9uCm1vZDIgPC0gbG0obWVkaWFufnNhbGVzK21vbnRoLCBkYXRhPXR4aG91c2luZ19jbGVhbikKc3VtbWFyeShtb2QyKQpgYGAKCmBgYHtyfQojI3JzcXVhcmVkIGlzIHF1aXRlIGxvdyAuMTE5OSB0aGVzdCB0d28gcHJlZCBleHBsYWluIG9uIGEgYSBzbWFsbCBhbW91bnQgb2YgdGhlIHZhcmlhYmlsaXR5IGluIG1kZWlhbiBzYWxlcyBwcmljZS4gIHNxdWFyZSBmb290YWdlLCBsb2NhdGlvbiwgbnVtYmVyIG9mIGJlZHJvb21zIGFnZSBvZiBob21lIHByb2JhYmx5IGRvIGEgYmV0dGVyIHByZWRpY3Rpb24uICAKYGBgCg==